%% This BibTeX bibliography file was created using BibDesk.
%% http://bibdesk.sourceforge.net/


%% Created for Stefan De Treville Wager at 2014-05-08 15:49:18 -0700 


%% Saved with string encoding Unicode (UTF-8)

  @Article{tmle,
    title = {{tmle}: An {R} Package for  Targeted Maximum Likelihood Estimation},
    author = {Susan Gruber and Mark J. {van der Laan}},
    journal = {Journal of Statistical Software},
    year = {2012},
    volume = {51},
    number = {13},
    pages = {1--35},
    url = {http://www.jstatsoft.org/v51/i13/},
    note = {doi:10.18637/jss.v051.i13},
  }

@article{bonhomme2015grouped,
  title={Grouped patterns of heterogeneity in panel data},
  author={Bonhomme, St{\'e}phane and Manresa, Elena},
  journal={Econometrica},
  volume={83},
  number={3},
  pages={1147--1184},
  year={2015},
  publisher={Wiley Online Library}
}

@inproceedings{mikolov2013efficient,
  title={Efficient estimation of word representations in vector space},
  author={Mikolov, Tomas and Chen, Kai and Corrado, Greg and Dean, Jeffrey},
  booktitle={International Conference on Learning Representations},
  year={2013}
}

@inproceedings{pennington2014glove,
  title={Glove: Global vectors for word representation},
  author={Pennington, Jeffrey and Socher, Richard and Manning, Christopher},
  booktitle={Proceedings of the 2014 conference on empirical methods in natural language processing (EMNLP)},
  pages={1532--1543},
  year={2014}
}

@article{neyman1948consistent,
  title={Consistent estimates based on partially consistent observations},
  author={Neyman, Jerzy and Scott, Elizabeth L},
  journal={Econometrica: Journal of the Econometric Society},
  pages={1--32},
  year={1948},
  publisher={JSTOR}
}

@book{diggle2002analysis,
  title={Analysis of longitudinal data},
  author={Diggle, Peter J and Heagerty, Patrick J and Liang, Kung-Yee and Zeger, Scott},
  year={2002},
  publisher={Oxford University Press}
}

@book{angrist2008mostly,
  title={Mostly harmless econometrics: An empiricist's companion},
  author={Angrist, Joshua D and Pischke, J{\"o}rn-Steffen},
  year={2008},
  publisher={Princeton university press}
}

@book{wooldridge2010econometric,
  title={Econometric analysis of cross section and panel data},
  author={Wooldridge, Jeffrey M},
  year={2010},
  publisher={MIT press}
}

@article{luedtke2016statistical,
  title={Statistical inference for the mean outcome under a possibly non-unique optimal treatment strategy},
  author={Luedtke, Alexander R and van der Laan, Mark J},
  journal={The Annals of Statistics},
  volume={44},
  number={2},
  pages={713--742},
  year={2016},
  publisher={Institute of Mathematical Statistics}
}

@article{luedtke2016optimal,
  title={Optimal individualized treatments in resource-limited settings},
  author={Luedtke, Alexander R and van der Laan, Mark J},
  journal={The International Journal of Biostatistics},
  volume={12},
  number={1},
  pages={283--303},
  year={2016},
  publisher={De Gruyter}
}

@article{bertsimas2017optimal,
  title={Optimal classification trees},
  author={Bertsimas, Dimitris and Dunn, Jack},
  journal={Machine Learning},
  volume={106},
  number={7},
  pages={1039--1082},
  year={2017},
  publisher={Springer}
}

@article{rudin2018big,
  title={The big data newsvendor: Practical insights from machine learning},
  author={Ban, Gah-Yi and Rudin, Cynthia},
  year={2018},
  journal={Operations Research},
  volume={forthcoming}
}

@article{fithian2014optimal,
  title={Optimal inference after model selection},
  author={Fithian, William and Sun, Dennis and Taylor, Jonathan},
  journal={arXiv preprint arXiv:1410.2597},
  year={2014}
}

@article{hirshberg2018balancing,
  title={Augmented Minimax Linear Estimation},
  author={Hirshberg, David A and Wager, Stefan},
  journal={arXiv preprint arXiv:1712.00038},
  year={2018}
}

@article{kunzel2017meta,
  title={Meta-learners for Estimating Heterogeneous Treatment Effects using Machine Learning},
  author={K{\"u}nzel, S{\"o}ren R and Sekhon, Jasjeet S and Bickel, Peter J and Yu, Bin},
  journal={arXiv preprint arXiv:1706.03461},
  year={2017}
}

@article{nie2017learning,
  title={Learning Objectives for Treatment Effect Estimation},
  author={Nie, Xinkun and Wager, Stefan},
  journal={arXiv preprint arXiv:1712.04912},
  year={2017}
}

@article{chen2012estimation,
  title={Estimation of nonparametric conditional moment models with possibly nonsmooth generalized residuals},
  author={Chen, Xiaohong and Pouzo, Demian},
  journal={Econometrica},
  volume={80},
  number={1},
  pages={277--321},
  year={2012},
  publisher={Wiley Online Library}
}

@article{bertsimas2014predictive,
  title={From predictive to prescriptive analytics},
  author={Bertsimas, Dimitris and Kallus, Nathan},
  journal={arXiv preprint arXiv:1402.5481},
  year={2014}
}

@article{kallus2017balanced,
  title={Balanced Policy Evaluation and Learning},
  author={Kallus, Nathan},
  journal={arXiv preprint arXiv:1705.07384},
  year={2017}
}

@article{van2006cross,
  title={The cross-validated adaptive epsilon-net estimator},
  author={van der Laan, Mark J and Dudoit, Sandrine and van der Vaart, Aad W},
  journal={Statistics \& Decisions},
  volume={24},
  number={3},
  pages={373--395},
  year={2006},
  publisher={Oldenbourg Wissenschaftsverlag}
}

@book{vapnik2000nature,
  title={The nature of statistical learning theory},
  author={Vapnik, Vladimir},
  year={2000},
  publisher={Springer Information Science and Statistics}
}

@article{pakes1989simulation,
  title={Simulation and the asymptotics of optimization estimators},
  author={Pakes, Ariel and Pollard, David},
  journal={Econometrica},
  pages={1027--1057},
  year={1989},
  publisher={JSTOR}
}

@article{ai2007estimation,
  title={Estimation of possibly misspecified semiparametric conditional moment restriction models with different conditioning variables},
  author={Ai, Chunrong and Chen, Xiaohong},
  journal={Journal of Econometrics},
  volume={141},
  number={1},
  pages={5--43},
  year={2007},
  publisher={Elsevier}
}

@article{aronow2013beyond,
  title={Beyond LATE: Estimation of the average treatment effect with an instrumental variable},
  author={Aronow, Peter M and Carnegie, Allison},
  journal={Political Analysis},
  volume={21},
  number={4},
  pages={492--506},
  year={2013},
  publisher={Cambridge University Press}
}

@article{chernozhukov2016locally,
  title={Locally robust semiparametric estimation},
  author={Chernozhukov, Victor and Escanciano, Juan Carlos and Ichimura, Hidehiko and Newey, Whitney K and Robins, James M},
  journal={arXiv preprint arXiv:1608.00033},
  year={2018}
}

@article{powell1989semiparametric,
  title={Semiparametric estimation of index coefficients},
  author={Powell, James L and Stock, James H and Stoker, Thomas M},
  journal={Econometrica},
  pages={1403--1430},
  year={1989},
  publisher={JSTOR}
}

@article{imbens1994late,
 author = {Guido W. Imbens and Joshua D. Angrist},
 journal = {Econometrica},
 number = {2},
 pages = {467-475},
 title = {Identification and Estimation of Local Average Treatment Effects},
 volume = {62},
 year = {1994}
}



@article{chen2016best,
  title={Best Subset Binary Prediction},
  author={Chen, Le-Yu and Lee, Sokbae},
  journal={arXiv preprint arXiv:1610.02738},
  year={2016}
}

@article{greenshtein2006best,
  title={Best subset selection, persistence in high-dimensional statistical learning and optimization under l1 constraint},
  author={Greenshtein, Eitan},
  journal={The Annals of Statistics},
  volume={34},
  number={5},
  pages={2367--2386},
  year={2006},
  publisher={Institute of Mathematical Statistics}
}

@article{efron1983estimating,
  title={Estimating the error rate of a prediction rule: Improvement on cross-validation},
  author={Efron, Bradley},
  journal={Journal of the American statistical association},
  volume={78},
  number={382},
  pages={316--331},
  year={1983},
  publisher={Taylor \& Francis Group}
}

@article{donoho1994ideal,
  title={Ideal spatial adaptation by wavelet shrinkage},
  author={Donoho, David L and Johnstone, Jain M},
  journal={Biometrika},
  volume={81},
  number={3},
  pages={425--455},
  year={1994},
  publisher={Oxford University Press}
}

@article{cai2005adaptive,
  title={On adaptive estimation of linear functionals},
  author={Cai, T Tony and Low, Mark G},
  journal={The Annals of Statistics},
  volume={33},
  number={5},
  pages={2311--2343},
  year={2005},
  publisher={Institute of Mathematical Statistics}
}

@article{armstrong2016optimal,
  title={Optimal inference in a class of regression models},
  author={Armstrong, Timothy B and Koles{\'a}r, Michal},
  year={2016}
}

@article{lepskii1991problem,
  title={On a problem of adaptive estimation in {G}aussian white noise},
  author={Lepskii, OV},
  journal={Theory of Probability \& Its Applications},
  volume={35},
  number={3},
  pages={454--466},
  year={1991},
  publisher={SIAM}
}

@article{birge2001gaussian,
  title={Gaussian model selection},
  author={Birg{\'e}, Lucien and Massart, Pascal},
  journal={Journal of the European Mathematical Society},
  volume={3},
  number={3},
  pages={203--268},
  year={2001},
  publisher={Springer}
}

@article{armstrong2015inference,
  title={Inference on Optimal Treatment Assignments},
  author={Armstrong, Timothy B and Shen, Shu},
  year={2015}
}

@article{kasy2016partial,
  title={Partial identification, distributional preferences, and the welfare ranking of policies},
  author={Kasy, Maximilian},
  journal={Review of Economics and Statistics},
  volume={98},
  number={1},
  pages={111--131},
  year={2016},
  publisher={MIT Press}
}

@article{tetenov2012statistical,
  title={Statistical treatment choice based on asymmetric minimax regret criteria},
  author={Tetenov, Aleksey},
  journal={Journal of Econometrics},
  volume={166},
  number={1},
  pages={157--165},
  year={2012},
  publisher={Elsevier}
}

@article{hirano2016panel,
  title={Panel Asymptotics and Statistical Decision Theory},
  author={Hirano, Keisuke and Porter, Jack R},
  journal={The Japanese Economic Review},
  volume={67},
  number={1},
  pages={33--49},
  year={2016},
  publisher={Wiley Online Library}
} 

@techreport{kitagawa2017equality,
  title={Equality-minded treatment choice},
  author={Kitagawa, Toru and Tetenov, Aleksey and others},
  year={2017},
  institution={Centre for Microdata Methods and Practice, Institute for Fiscal Studies}
}

@incollection{chamberlain2011bayesian,
  title={Bayesian aspects of treatment choice},
  author={Chamberlain, Gary},
  booktitle={The Oxford Handbook of Bayesian Econometrics},
  year={2011}
}

@techreport{kleinberg2017human,
  title={Human decisions and machine predictions},
  author={Kleinberg, Jon and Lakkaraju, Himabindu and Leskovec, Jure and Ludwig, Jens and Mullainathan, Sendhil},
  year={2017},
  institution={National Bureau of Economic Research}
}

@article{manski2007minimax,
  title={Minimax-regret treatment choice with missing outcome data},
  author={Manski, Charles F},
  journal={Journal of Econometrics},
  volume={139},
  number={1},
  pages={105--115},
  year={2007},
  publisher={Elsevier}
}

@book{manski2009identification,
  title={Identification for Prediction and Decision},
  author={Manski, Charles F},
  year={2009},
  publisher={Harvard University Press}
}

@book{lecam1986asymptotic,
  title={Asymptotic Methods in Statistical Theory},
  author={Le Cam, Lucien M},
  year={1986},
  publisher={Springer-Verlag New York, Inc.}
}

@article{lai1985asymptotically,
  title={Asymptotically efficient adaptive allocation rules},
  author={Lai, Tze Leung and Robbins, Herbert},
  journal={Advances in Applied Mathematics},
  volume={6},
  number={1},
  pages={4--22},
  year={1985},
  publisher={Elsevier}
}

@article{luedtke2017faster,
  title={Faster Rates for Policy Learning},
  author={Luedtke, Alexander and Chambaz, Antoine},
  journal={arXiv preprint arXiv:1704.06431},
  year={2017}
}

@article{stoye2012minimax,
  title={Minimax regret treatment choice with covariates or with limited validity of experiments},
  author={Stoye, J{\"o}rg},
  journal={Journal of Econometrics},
  volume={166},
  number={1},
  pages={138--156},
  year={2012},
  publisher={Elsevier}
}

@article{stoye2009minimax,
  title={Minimax regret treatment choice with finite samples},
  author={Stoye, J{\"o}rg},
  journal={Journal of Econometrics},
  volume={151},
  number={1},
  pages={70--81},
  year={2009},
  publisher={Elsevier}
}

@book{wald1950statistical,
  title={Statistical Decision Functions},
  author={Wald, Abraham},
  year={1950},
  publisher={Wiley}
}

@article{savage1951theory,
  title={The theory of statistical decision},
  author={Savage, Leonard J},
  journal={Journal of the American Statistical Association},
  volume={46},
  number={253},
  pages={55--67},
  year={1951},
  publisher={Taylor \& Francis}
}

@incollection{robins2008higher,
  title={Higher order influence functions and minimax estimation of nonlinear functionals},
  author={Robins, James and Li, Lingling and Tchetgen, Eric and van der Vaart, Aad},
  booktitle={Probability and Statistics: Essays in Honor of David A. Freedman},
  pages={335--421},
  year={2008},
  publisher={Institute of Mathematical Statistics}
}

@article{robins2017minimax,
  title={Minimax estimation of a functional on a structured high dimensional model},
  author={Robins, J and Li, Lingling and Mukherjee, Rajarshi and Tchetgen, E and van der Vaart, Aad},
  journal={Annals of Statistics, forthcoming},
  year={2017}
}

@article{mbakop2016model,
  title={Model Selection for Treatment Choice: Penalized Welfare Maximization},
  author={Mbakop, Eric and Tabord-Meehan, Max},
  journal={arXiv preprint arXiv:1609.03167},
  year={2016}
}

@ARTICLE{rakhlin2016efficient,
  title={Bistro: An efficient relaxation-based method for contextual bandits},
  author={Rakhlin, Alexander and Sridharan, Karthik},
  booktitle={International Conference on Machine Learning (ICML)},
  year={2016}
}

@article{perchet2013multiarmed,
author = "Perchet, Vianney and Rigollet, Philippe",
fjournal = "The Annals of Statistics",
number = "2",
pages = "693--721",
title = "The multi-armed bandit problem with covariates",
volume = "41",
year = "2013"
}

@article{graham2012inverse,
  title={Inverse probability tilting for moment condition models with missing data},
  author={Graham, Bryan S and Pinto, Cristine Campos De Xavier and Egel, Daniel},
  journal={The Review of Economic Studies},
  volume={79},
  number={3},
  pages={1053--1079},
  year={2012},
  publisher={Oxford University Press}
}

@article{athey2018generalized,
  title={Generalized random forests},
  author={Athey, Susan and Tibshirani, Julie and Wager, Stefan},
  journal={The Annals of Statistics},
  volume={47},
  number={2},
  pages={1148--1178},
  year={2019},
  publisher={Institute of Mathematical Statistics}
}

@article{grubinger2014evtree,
  title={evtree: Evolutionary learning of globally optimal classification and regression trees in R},
  author={Grubinger, Thomas and Zeileis, Achim and Pfeiffer, Karl-Peter},
  institution={Journal of Statistical Software},
  volume={61},
  year={2014},
  number={1}
}

@article{cortes1995support,
  title={Support-vector networks},
  author={Cortes, Corinna and Vapnik, Vladimir},
  journal={Machine learning},
  volume={20},
  number={3},
  pages={273--297},
  year={1995},
  publisher={Springer}
}

@article{schick1986asymptotically,
  title={On asymptotically efficient estimation in semiparametric models},
  author={Schick, Anton},
  journal={The Annals of Statistics},
  pages={1139--1151},
  year={1986},
  publisher={JSTOR}
}

@article{auer2002nonstochastic,
  title={The nonstochastic multiarmed bandit problem},
  author={Auer, Peter and Cesa-Bianchi, Nicolo and Freund, Yoav and Schapire, Robert E},
  journal={SIAM Journal on Computing},
  volume={32},
  number={1},
  pages={48--77},
  year={2002},
  publisher={SIAM}
}

@article{goldenshluger2013linear,
  title={A linear response bandit problem},
  author={Goldenshluger, Alexander and Zeevi, Assaf and others},
  journal={Stochastic Systems},
  volume={3},
  number={1},
  pages={230--261},
  year={2013},
  publisher={INFORMS Applied Probability Society}
}

@article{robbins1952some,
  title={Some aspects of the sequential design of experiments},
  author={Robbins, Herbert and others},
  journal={Bulletin of the American Mathematical Society},
  volume={58},
  number={5},
  pages={527--535},
  year={1952},
  publisher={American Mathematical Society}
}

@article{lai1987adaptive,
  title={Adaptive treatment allocation and the multi-armed bandit problem},
  author={Lai, Tze Leung},
  journal={The Annals of Statistics},
  pages={1091--1114},
  year={1987},
  publisher={JSTOR}
}

@article{bastani2015online,
  title={Online decision-making with high-dimensional covariates},
  author={Bastani, Hamsa and Bayati, Mohsen},
  year={2015}
}


@article{feraud2015decision,
  author    = {Rapha{\"{e}}l F{\'{e}}raud and
               Robin Allesiardo and
               Tanguy Urvoy and
               Fabrice Cl{\'{e}}rot},
  title     = {Decision Tree Algorithms for the Contextual Bandit Problem},
  journal   = {CoRR},
  volume    = {abs/1504.06952},
  year      = {2015},
  url       = {http://arxiv.org/abs/1504.06952},
  timestamp = {Thu, 14 Jul 2016 17:28:19 +0200},
  biburl    = {http://dblp.uni-trier.de/rec/bib/journals/corr/FeraudAUC15},
  bibsource = {dblp computer science bibliography, http://dblp.org}
}

@article{zhang2012estimating,
  title={Estimating optimal treatment regimes from a classification perspective},
  author={Zhang, Baqun and Tsiatis, Anastasios A and Davidian, Marie and Zhang, Min and Laber, Eric},
  journal={Stat},
  volume={1},
  number={1},
  pages={103--114},
  year={2012},
  publisher={Wiley Online Library}
}

@article{qian2011performance,
  title={Performance guarantees for individualized treatment rules},
  author={Qian, Min and Murphy, Susan A},
  journal={Annals of Statistics},
  volume={39},
  number={2},
  pages={1180},
  year={2011}
}

@article{haussler1995sphere,
  title={Sphere packing numbers for subsets of the {B}oolean n-cube with bounded {V}apnik-{C}hervonenkis dimension},
  author={Haussler, David},
  journal={Journal of Combinatorial Theory, Series A},
  volume={69},
  number={2},
  pages={217--232},
  year={1995},
  publisher={Elsevier}
}

@article{gine2006concentration,
  title={Concentration inequalities and asymptotic results for ratio type empirical processes},
  author={Gin{\'e}, Evarist and Koltchinskii, Vladimir},
  journal={The Annals of Probability},
  volume={34},
  number={3},
  pages={1143--1216},
  year={2006},
  publisher={Institute of Mathematical Statistics}
}

@article{koltchinskii2006local,
  title={Local {R}ademacher complexities and oracle inequalities in risk minimization},
  author={Koltchinskii, Vladimir},
  journal={The Annals of Statistics},
  volume={34},
  number={6},
  pages={2593--2656},
  year={2006},
  publisher={Institute of Mathematical Statistics}
}

@article{bartlett2005local,
  title={Local {R}ademacher complexities},
  author={Bartlett, Peter L and Bousquet, Olivier and Mendelson, Shahar},
  journal={Annals of Statistics},
  pages={1497--1537},
  year={2005},
  publisher={JSTOR}
}

@article{bartlett2006empirical,
  title={Empirical minimization},
  author={Bartlett, Peter L and Mendelson, Shahar},
  journal={Probability Theory and Related Fields},
  volume={135},
  number={3},
  pages={311--334},
  year={2006},
  publisher={Springer}
}

@book{boucheron2013concentration,
  title={Concentration inequalities: A nonasymptotic theory of independence},
  author={Boucheron, St{\'e}phane and Lugosi, G{\'a}bor and Massart, Pascal},
  year={2013},
  publisher={Oxford university press}
}

@article{dudley1967sizes,
  title={The sizes of compact subsets of {H}ilbert space and continuity of {G}aussian processes},
  author={Dudley, Richard M},
  journal={Journal of Functional Analysis},
  volume={1},
  number={3},
  pages={290--330},
  year={1967},
  publisher={Elsevier}
}

@article{talagrand1996new,
  title={New concentration inequalities in product spaces},
  author={Talagrand, Michel},
  journal={Inventiones Mathematicae},
  volume={126},
  number={3},
  pages={505--563},
  year={1996},
  publisher={Springer}
}

@article{bousquet2002bennett,
  title={A {B}ennett concentration inequality and its application to suprema of empirical processes},
  author={Bousquet, Olivier},
  journal={Comptes Rendus Mathematique},
  volume={334},
  number={6},
  pages={495--500},
  year={2002},
  publisher={Elsevier}
}

@article{bartlett2002rademacher,
  title={Rademacher and {G}aussian complexities: Risk bounds and structural results},
  author={Bartlett, Peter L and Mendelson, Shahar},
  journal={Journal of Machine Learning Research},
  volume={3},
  pages={463--482},
  year={2002}
}

@article{vapnik1971uniform,
  title={On the Uniform Convergence of Relative Frequencies of Events to Their Probabilities},
  author={Vapnik, VN and Chervonenkis, A Ya},
  journal={Theory of Probability and its Applications},
  volume={16},
  number={2},
  pages={264},
  year={1971},
  publisher={Society for Industrial and Applied Mathematics}
}

@article{van2010collaborative,
  title={Collaborative double robust targeted maximum likelihood estimation},
  author={van der Laan, Mark J and Gruber, Susan},
  journal={The International Journal of Biostatistics},
  volume={6},
  number={1},
  year={2010}
}

@article{kitagawa2015should,
  title={Who should be treated? {E}mpirical welfare maximization methods for treatment choice},
  author={Kitagawa, Toru and Tetenov, Aleksey},
  journal={Econometrica},
  volume={86},
  number={2},
  pages={591--616},
  year={2018},
  publisher={Wiley Online Library}
}

@article{chen2016personalized,
  title={Personalized Dose Finding Using Outcome Weighted Learning},
  author={Chen, Guanhua and Zeng, Donglin and Kosorok, Michael R},
  journal={Journal of the American Statistical Association},
  number={just-accepted},
  year={2016},
  publisher={Taylor \& Francis}
}

@article{zhou2015residual,
  title={Residual weighted learning for estimating individualized treatment rules},
  author={Zhou, Xin and Mayer-Hamblett, Nicole and Khan, Umer and Kosorok, Michael R},
  journal={Journal of the American Statistical Association},
  volume={112},
  number={517},
  pages={169--187},
  year={2017},
  publisher={Taylor \& Francis}
}

@article{zhao2012estimating,
  title={Estimating individualized treatment rules using outcome weighted learning},
  author={Zhao, Yingqi and Zeng, Donglin and Rush, A John and Kosorok, Michael R},
  journal={Journal of the American Statistical Association},
  volume={107},
  number={499},
  pages={1106--1118},
  year={2012},
  publisher={Taylor \& Francis}
}

@inproceedings{maurer2009empirical,
  title={Empirical {B}ernstein bounds and sample variance penalization},
  author={Maurer, A and Pontil, M},
  booktitle={Conference on Learning Theory},
  year={2009}
}

@inproceedings{cortes2010learning,
  title={Learning bounds for importance weighting},
  author={Cortes, Corinna and Mansour, Yishay and Mohri, Mehryar},
  booktitle={Advances in Neural Information Processing Systems},
  pages={442--450},
  year={2010}
}

@article{bottou2013counterfactual,
  title={Counterfactual reasoning and learning systems: The example of computational advertising.},
  author={Bottou, L{\'e}on and Peters, Jonas and Candela, Joaquin Quinonero and Charles, Denis Xavier and Chickering, Max and Portugaly, Elon and Ray, Dipankar and Simard, Patrice Y and Snelson, Ed},
  journal={Journal of Machine Learning Research},
  volume={14},
  number={1},
  pages={3207--3260},
  year={2013}
}

@inproceedings{kallus2017recursive,
  title={Recursive Partitioning for Personalization using Observational Data},
  author={Kallus, Nathan},
  booktitle={International Conference on Machine Learning},
  pages={1789--1798},
  year={2017}
}

@article{swaminathan2015batch,
  author  = {Adith Swaminathan and Thorsten Joachims},
  title   = {Batch Learning from Logged Bandit Feedback through Counterfactual Risk Minimization},
  journal = {Journal of Machine Learning Research},
  year    = {2015},
  volume  = {16},
  pages   = {1731-1755}
}

@article{athey2016approximate,
  title={Approximate residual balancing: debiased inference of average treatment effects in high dimensions},
  author={Athey, Susan and Imbens, Guido W and Wager, Stefan},
  journal={Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
  volume={80},
  number={4},
  pages={597--623},
  year={2018},
  publisher={Wiley Online Library}
}

@article{neyman1923applications,
  title={Sur les applications de la th{\'e}orie des probabilit{\'e}s aux experiences agricoles: Essai des principes},
  author={Neyman, Jersey},
  journal={Roczniki Nauk Rolniczych},
  volume={10},
  pages={1--51},
  year={1923}
}

@article{van2007super,
  title={Super learner},
  author={van der Laan, Mark J and Polley, Eric C and Hubbard, Alan E},
  journal={Statistical applications in genetics and molecular biology},
  volume={6},
  number={1},
  year={2007},
  publisher={De Gruyter}
}


@article{ning2014general,
  title={A general theory of hypothesis tests and confidence regions for sparse high dimensional models},
  author={Ning, Yang and Liu, Han},
  journal={arXiv preprint arXiv:1412.8765},
  year={2014}
}

@book{van2011targeted,
  title={Targeted Learning: Causal Inference for Observational and Experimental Data},
  author={van der Laan, Mark J and Rose, Sherri},
  year={2011},
  publisher={Springer Science \& Business Media}
}

@article{robins1994estimation,
  title={Estimation of regression coefficients when some regressors are not always observed},
  author={Robins, James M and Rotnitzky, Andrea and Zhao, Lue Ping},
  journal={Journal of the American statistical Association},
  volume={89},
  number={427},
  pages={846--866},
  year={1994},
  publisher={Taylor \& Francis Group}
}

@article{cassel1976some,
  title={Some results on generalized difference estimation and generalized regression estimation for finite populations},
  author={Cassel, Claes M and S{\"a}rndal, Carl E and Wretman, Jan H},
  journal={Biometrika},
  volume={63},
  number={3},
  pages={615--620},
  year={1976},
  publisher={Biometrika Trust}
}

@article{wager2016high,
author = {Wager, Stefan and Du, Wenfei and Taylor, Jonathan and Tibshirani, Robert J.}, 
title = {High-dimensional regression adjustments in randomized experiments},
year = {2016}, 
volume={113},
number={45},
pages={12673-��-12678},
journal = {Proceedings of the National Academy of Sciences} 
}

@article{hotz2006evaluating,
  title={Evaluating the Differential Effects of Alternative Welfare-to-Work Training Components: A Reanalysis of the California {GAIN} Program},
  author={Hotz, V Joseph and Imbens, Guido W and Klerman, Jacob A},
  journal={Journal of Labor Economics},
  volume={24},
  number={3},
  year={2006}
}


@article{su2016slope,
  title={{SLOPE} is adaptive to unknown sparsity and asymptotically minimax},
  author={Su, Weijie and Candes, Emmanuel},
  journal={The Annals of Statistics},
  volume={44},
  number={3},
  pages={1038--1068},
  year={2016},
  publisher={Institute of Mathematical Statistics}
}

@article{hanson1971bound,
  title={A bound on tail probabilities for quadratic forms in independent random variables},
  author={Hanson, David Lee and Wright, Farroll Tim},
  journal={The Annals of Mathematical Statistics},
  volume={42},
  number={3},
  pages={1079--1083},
  year={1971},
  publisher={JSTOR}
}

@article{chernozhukov2016double,
  title={Double/debiased machine learning for treatment and structural parameters},
  author={Chernozhukov, Victor and Chetverikov, Denis and Demirer, Mert and Duflo, Esther and Hansen, Christian and Newey, Whitney and Robins, James},
  journal={The Econometrics Journal},
  volume={21},
  number={1},
  pages={C1--C68},
  year={2018},
  publisher={Wiley Online Library}
}

@article{rudelson2013reconstruction,
  title={Reconstruction From Anisotropic Random Measurements},
  author={Rudelson, Mark and Zhou, Shuheng},
  journal={IEEE Transactions on Information Theory},
  volume={59},
  number={6},
  pages={3434--3447},
  year={2013},
  publisher={IEEE Press}
}

@article{vershynin2012close,
  title={How close is the sample covariance matrix to the actual covariance matrix?},
  author={Vershynin, Roman},
  journal={Journal of Theoretical Probability},
  volume={25},
  number={3},
  pages={655--686},
  year={2012},
  publisher={Springer}
}

@article{rudelson2013hanson,
  title={Hanson-{W}right inequality and sub-{G}aussian concentration},
  author={Rudelson, Mark and Vershynin, Roman},
  journal={Electronic Communications in Probability},
  volume={18},
  number={82},
  pages={1--9},
  year={2013}
}

@article{newey1994asymptotic,
  title={The asymptotic variance of semiparametric estimators},
  author={Newey, Whitney K},
  journal={Econometrica: Journal of the Econometric Society},
  pages={1349--1382},
  year={1994},
  volume={62},
  number={6},
  publisher={JSTOR}
}

@article{rothe2013semiparametric,
  title={Semiparametric estimation and inference using doubly robust moment conditions},
  author={Rothe, Christoph and Firpo, Sergio},
  journal={IZA discussion paper},
  year={2013}
}

@article{chen2008semiparametric,
  title={Semiparametric Efficiency in {GMM} Models with Auxiliary Data},
  author={Chen, Xiaohong and Hong, Han and Tarozzi, Alessandro},
  journal={The Annals of Statistics},
  pages={808--843},
  year={2008},
  publisher={JSTOR}
}

@book{van2003unified,
  title={Unified Methods for Censored Longitudinal Data and Causality},
  author={Van der Laan, Mark J and Robins, James M},
  year={2003},
  publisher={Springer Science \& Business Media}
}

@article{lalonde1986evaluating,
  title={Evaluating the econometric evaluations of training programs with experimental data},
  author={LaLonde, Robert J},
  journal={The American Economic Review},
  pages={604--620},
  year={1986},
  publisher={JSTOR}
}

@article{javanmard2015biasing,
  title={De-biasing the Lasso: Optimal Sample Size for {G}aussian Designs},
  author={Javanmard, Adel and Montanari, Andrea},
  journal={arXiv preprint arXiv:1508.02757},
  year={2015}
}

@article{collier2015minimax,
  title={Minimax estimation of linear and quadratic functionals on sparsity classes},
  author={Collier, Olivier and Comminges, La{\"e}titia and Tsybakov, Alexandre B},
  journal={arXiv preprint arXiv:1502.00665},
  year={2015}
}

@article{heckman1998matching,
  title={Matching as an econometric evaluation estimator},
  author={Heckman, James J and Ichimura, Hidehiko and Todd, Petra},
  journal={The Review of Economic Studies},
  volume={65},
  number={2},
  pages={261--294},
  year={1998},
  publisher={Oxford University Press}
}

@book{rosenbaum2002observational,
  title={Observational Studies},
  author={Rosenbaum, Paul R},
  year={2002},
  publisher={Springer}
}

@article{littlewu,
  title={Models for contingency tables with
known margins when target and sampled populations differ},
  author={Little, Roderick  and Wu, Mei-Miau},
  journal={Journal of the American Statistical Association},
volume={86},
number={413},
  pages={87--95},
  year={1991}
}

@article{hirr,
  title={Combining Panels with Attrition and Refreshment Samples},
  author={Hirano, Keisuke and Imbens, Guido and Ridder, Geert and Rubin, Donald},
  journal={Econometrica},
  pages={1645-1659},
  year={2001}
}

@incollection{zheng2011cross,
  title={Cross-validated targeted minimum-loss-based estimation},
  author={Zheng, Wenjing and van der Laan, Mark J},
  booktitle={Targeted Learning},
  pages={459--474},
  year={2011},
  publisher={Springer}
}




@article{graham1,
  title={Inverse probability tilting for moment condition models with
missing data},
  author={Graham, Bryan and Pinto, Christine and Egel, Daniel},
  journal={Review of Economic Studies},
  pages={1053-1079},
  year={2012}
}

@article{graham2,
  title={Efficient estimation of data combination models by the method of
auxiliary-to-study tilting (AST)},
  author={Graham, Bryan and Pinto, Christine and Egel, Daniel},
  journal={Journal of Business and Economic Statistics},
  pages={-},
  year={2016}
}


@article{abadie2003semiparametric,
  title={Semiparametric instrumental variable estimation of treatment response models},
  author={Abadie, Alberto},
  journal={Journal of Econometrics},
  volume={113},
  number={2},
  pages={231--263},
  year={2003},
  publisher={Elsevier}
}

@article{abadie42synth,
  title={Synth: An {R} Package for Synthetic Control Methods in Comparative Case Studies},
  author={Abadie, Alberto and Diamond, Alexis and Hainmueller, Jens},
  year={2011},
  journal={Journal of Statistical Software},
  volume={42},
  number={i13},
  publisher={American Statistical Association}
}
@article{abadie2003economic,
  title={The economic costs of conflict: A case study of the Basque Country},
  author={Abadie, Alberto and Gardeazabal, Javier},
  journal={American economic review},
  pages={113--132},
  year={2003},
  publisher={JSTOR}
}
@article{abadie2010synthetic,
  title={Synthetic control methods for comparative case studies: Estimating the effect of California’s tobacco control program},
  author={Abadie, Alberto and Diamond, Alexis and Hainmueller, Jens},
  journal={Journal of the American Statistical Association},
  volume={105},
  number={490},
  year={2010}
}
@article{abadie2014comparative,
  title={Comparative politics and the synthetic control method},
  author={Abadie, Alberto and Diamond, Alexis and Hainmueller, Jens},
  journal={American Journal of Political Science},
  year={2014},
  publisher={Wiley Online Library}
}


@article{farrell2015robust,
  title={Robust inference on average treatment effects with possibly more covariates than observations},
  author={Farrell, Max H},
  journal={Journal of Econometrics},
  volume={189},
  number={1},
  pages={1--23},
  year={2015},
  publisher={Elsevier}
}

@article{newey2004higher,
  title={Higher order properties of GMM and generalized empirical likelihood estimators},
  author={Newey, Whitney K and Smith, Richard J},
  journal={Econometrica},
  volume={72},
  number={1},
  pages={219--255},
  year={2004},
  publisher={Wiley Online Library}
}

@book{bickel,
  title={Efficient and Adaptive Estimation for Semiparametric Models},
  author={Bickel, Peter and Klaassen, Chris and Ritov, Yakov and Wellner, Jon},
  year={1998},
  publisher={Springer-Verlag}
}


@article{abadie,
  title={Large sample properties of matching estimators for average treatment effects},
  author={Abadie, Alberto and Imbens, Guido W},
  journal={Econometrica},
  volume={74},
  number={1},
  pages={235--267},
  year={2006},
  publisher={Wiley Online Library}
}

@article{deville1992calibration,
  title={Calibration estimators in survey sampling},
  author={Deville, Jean-Claude and S{\"a}rndal, Carl-Erik},
  journal={JASA},
  volume={87},
  number={418},
  pages={376--382},
  year={1992},
  publisher={Taylor \& Francis Group}
}

@article{zhao2016covariate,
  title={Covariate Balancing Propensity Score by Tailored Loss Functions},
  author={Zhao, Qingyuan},
  journal={arXiv preprint arXiv:1601.05890},
  year={2016}
}

@article{imai2014covariate,
  title={Covariate balancing propensity score},
  author={Imai, Kosuke and Ratkovic, Marc},
  journal={JRSS-B},
  volume={76},
  number={1},
  pages={243--263},
  year={2014},
  publisher={Wiley Online Library}
}

% journal={Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
@article{chan2015globally,
  title={Globally efficient non-parametric inference of average treatment effects by empirical balancing calibration weighting},
  author={Chan, Kwun Chuen Gary and Yam, Sheung Chi Phillip and Zhang, Zheng},
  journal={JRSS-B},
  year={2015},
  publisher={Wiley Online Library}
}



@article{imbensel,
  title={Information Theoretic Approaches to Inference in Moment Condition Models},
  author={Imbens, Guido and Spady, Richard and Johnson, Phillip},
  journal={Econometrica},
  year={1998}
}

@article{belloni2011square,
  title={Square-root lasso: pivotal recovery of sparse signals via conic programming},
  author={Belloni, Alexandre and Chernozhukov, Victor and Wang, Lie},
  journal={Biometrika},
  volume={98},
  number={4},
  pages={791--806},
  year={2011},
  publisher={Biometrika Trust}
}

@article{reid2016study,
  title={A study of error variance estimation in Lasso regression},
  author={Reid, Stephen and Tibshirani, Robert and Friedman, Jerome},
  journal={Statistica Sinica},
  volume={26},
  pages={35--67},
  year={2016}
}

@article{sun2012scaled,
  title={Scaled sparse linear regression},
  author={Sun, Tingni and Zhang, Cun-Hui},
  journal={Biometrika},
  pages={ass043},
  year={2012},
  publisher={Biometrika Trust}
}

@article{fan2012variance,
  title={Variance estimation using refitted cross-validation in ultrahigh dimensional regression},
  author={Fan, Jianqing and Guo, Shaojun and Hao, Ning},
  journal={Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
  volume={74},
  number={1},
  pages={37--65},
  year={2012},
  publisher={Wiley Online Library}
}

@article{dicker2014variance,
  title={Variance estimation in high-dimensional linear models},
  author={Dicker, Lee H},
  journal={Biometrika},
  volume={101},
  number={2},
  pages={269--284},
  year={2014},
  publisher={Biometrika Trust}
}

@article{chatterjee2015prediction,
  title={Prediction error of cross-validated Lasso},
  author={Chatterjee, Sourav and Jafarov, Jafar},
  journal={arXiv preprint arXiv:1502.06291},
  year={2015}
}

@article{hahn1998role,
  title={On the role of the propensity score in efficient semiparametric estimation of average treatment effects},
  author={Hahn, Jinyong},
  journal={Econometrica},
  pages={315--331},
  year={1998},
  publisher={JSTOR}
}

@article{bloniarz2015lasso,
  title={Lasso adjustments of treatment effect estimates in randomized experiments},
  author={Bloniarz, Adam and Liu, Hanzhong and Zhang, Cun-Hui and Sekhon, Jasjeet and Yu, Bin},
  journal={arXiv preprint arXiv:1507.03652},
  year={2015}
}

@article{owen2007infinitely,
  title={Infinitely imbalanced logistic regression},
  author={Owen, Art B},
  journal={JMLR},
  volume={8},
  pages={761--773},
  year={2007},
  publisher={JMLR. org}
}

@article{zou2005regularization,
  title={Regularization and variable selection via the elastic net},
  author={Zou, Hui and Hastie, Trevor},
  journal={JRSS-B},
  volume={67},
  number={2},
  pages={301--320},
  year={2005},
  publisher={Wiley Online Library}
}


@article{van2014asymptotically,
  title={On asymptotically optimal confidence regions and tests for high-dimensional models},
  author={Van de Geer, Sara and B{\"u}hlmann, Peter and Ritov, Ya’acov and Dezeure, Ruben},
  journal={The Annals of Statistics},
  volume={42},
  number={3},
  pages={1166--1202},
  year={2014},
  publisher={Institute of Mathematical Statistics}
}

@article{van2009conditions,
  title={On the conditions used to prove oracle results for the Lasso},
  author={Van De Geer, Sara A and B{\"u}hlmann, Peter},
  journal={Electronic Journal of Statistics},
  volume={3},
  pages={1360--1392},
  year={2009},
  publisher={Institute of Mathematical Statistics}
}



@article{cai2015confidence,
  title={Confidence Intervals for High-Dimensional Linear Regression: Minimax Rates and Adaptivity},
  author={Cai, T Tony and Guo, Zijian},
  journal={arXiv preprint arXiv:1506.05539},
  year={2015}
}

@article{meinshausen2009lasso,
  title={Lasso-type recovery of sparse representations for high-dimensional data},
  author={Meinshausen, Nicolai and Yu, Bin},
  journal={The Annals of Statistics},
  pages={246--270},
  year={2009},
  publisher={JSTOR}
}

@article{candes2007dantzig,
  title={The Dantzig selector: Statistical estimation when $p$ is much larger than $n$},
  author={Cand\`es, Emmanuel and Tao, Terence},
  journal={The Annals of Statistics},
  pages={2313--2351},
  year={2007},
  publisher={JSTOR}
}

@article{bickel2009simultaneous,
  title={Simultaneous analysis of Lasso and Dantzig selector},
  author={Bickel, Peter J and Ritov, Ya'acov and Tsybakov, Alexandre B},
  journal={The Annals of Statistics},
  pages={1705--1732},
  year={2009},
  publisher={JSTOR}
}

@article{negahban2012unified,
  title={A Unified Framework for High-Dimensional Analysis of {$M$}-Estimators with Decomposable Regularizers},
  author={Negahban, Sahand N and Ravikumar, Pradeep and Wainwright, Martin J and Yu, Bin},
  journal={Statistical Science},
  volume={27},
  number={4},
  pages={538--557},
  year={2012}
}

@article{javanmard2014confidence,
  title={Confidence intervals and hypothesis testing for high-dimensional regression},
  author={Javanmard, Adel and Montanari, Andrea},
  journal={The Journal of Machine Learning Research},
  volume={15},
  number={1},
  pages={2869--2909},
  year={2014},
  publisher={JMLR. org}
}

@article{zhang2014confidence,
  title={Confidence intervals for low dimensional parameters in high dimensional linear models},
  author={Zhang, Cun-Hui and Zhang, Stephanie S},
  journal={Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
  volume={76},
  number={1},
  pages={217--242},
  year={2014},
  publisher={Wiley Online Library}
}

@article{belloni2013program,
  title={Program evaluation with high-dimensional data},
  author={Belloni, Alexandre and Chernozhukov, Victor and Fern{\'a}ndez-Val, Ivan and Hansen, Christian},
  journal={Econometrica},
  year={2017},
  volume={85},
  number={1},
  pages={233--298}
}

@book{hastie2015statistical,
  title={Statistical Learning with Sparsity: The Lasso and Generalizations},
  author={Hastie, Trevor and Tibshirani, Robert and Wainwright, Martin},
  year={2015},
  publisher={CRC Press}
}

@article{chen1998atomic,
  title={Atomic decomposition by basis pursuit},
  author={Chen, Scott Shaobing and Donoho, David L and Saunders, Michael A},
  journal={SIAM Journal on Scientific Computing},
  volume={20},
  number={1},
  pages={33--61},
  year={1998},
  publisher={SIAM}
}



@article{hainmueller,
  title={Entropy Balancing for Causal Effects: A Multivariate
Reweighting Method to Produce Balanced Samples
in Observational Studies},
  author={Hainmueller, Jens},
  journal={Political Analysis},
  volume={20},
  number={1},
  pages={25--46},
  year={2012}
}


@article{robins1,
  title={Semiparametric Efficiency in Multivariate Regression Models with
Missing Data},
  author={Robins, James and Andrea Rotnitzky},
  journal={Journal of the American Statistical Association},
  volume={90},
  number={1},
  pages={122-129},
  year={1995}
}


@article{robins2,
  title={Analysis of Semiparametric Regression Models for Repeated
Outcomes in the Presence of Missing Data},
  author={Robins, James and Andrea Rotnitzky and Zhao, L.P.},
  journal={Journal of the American Statistical
Association},
  volume={90},
  number={1},
  pages={106-121},
  year={1995}
}



@article{schafer,
  title={Demystifying Double Robustness:
A Comparison of Alternative Strategies for
Estimating a Population Mean from
Incomplete Data},
  author={Kang, Joseph and Schafer, Joseph},
  journal={Statistical Science},
  volume={22},
  number={4},
  pages={523-529},
  year={2007}
}


@article{zubizarreta2015stable,
  title={Stable Weights that Balance Covariates for Estimation with Incomplete Outcome Data},
  author={Zubizarreta, Jos{\'e} R},
  journal={Journal of the American Statistical Association},
  volume={110},
  number={511},
  year={2015},
  pages={910--922}
}

@article{friedman2010regularization,
  title={Regularization paths for generalized linear models via coordinate descent},
  author={Friedman, Jerome and Hastie, Trevor and Tibshirani, Rob},
  journal={Journal of Statistical Software},
  volume={33},
  number={1},
  pages={1},
  year={2010}
}


@article{hellerstein1999imposing,
  title={Imposing Moment Restrictions by Weighting},
  author={Hellerstein, Judith and Imbens, Guido},
  journal={Review of Economics and Statistics},
  volume={81},
  number={1},
  pages={1-14},
  year={1999}
}

@article{tibshirani1996regression,
  title={Regression shrinkage and selection via the lasso},
  author={Tibshirani, Robert},
  journal={Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
  pages={267--288},
  year={1996},
  publisher={JSTOR}
}

@article{belloni2014inference,
  title={Inference on treatment effects after selection among high-dimensional controls},
  author={Belloni, Alexandre and Chernozhukov, Victor and Hansen, Christian},
  journal={The Review of Economic Studies},
  volume={81},
  number={2},
  pages={608--650},
  year={2014},
  publisher={Oxford University Press}
}

@article{berk2013valid,
  title={Valid post-selection inference},
  author={Berk, Richard and Brown, Lawrence and Buja, Andreas and Zhang, Kai and Zhao, Linda},
  journal={The Annals of Statistics},
  volume={41},
  number={2},
  pages={802--837},
  year={2013},
  publisher={Institute of Mathematical Statistics}
}

@article{chernozhukov2015valid,
author = {Victor Chernozhukov and Christian Hansen and Martin Spindler },
title = {Valid Post-Selection and Post-Regularization Inference: An Elementary, General Approach},
journal = {Annual Review of Economics},
volume = {7},
number = {1},
pages = {649-688},
year = {2015},
}

  @Manual{CRAN,
    title = {R: A Language and Environment for Statistical Computing},
    author = {{R Core Team}},
    organization = {R Foundation for Statistical Computing},
    address = {Vienna, Austria},
    year = {2019},
    url = {https://www.R-project.org/},
  }


@article{taylor2015statistical,
  title={Statistical learning and selective inference},
  author={Taylor, Jonathan and Tibshirani, Robert J},
  journal={Proceedings of the National Academy of Sciences},
  volume={112},
  number={25},
  pages={7629--7634},
  year={2015},
  publisher={National Acad Sciences}
}

 @Manual{rpart,
    title = {rpart: Recursive Partitioning and Regression Trees},
    author = {Terry Therneau and Beth Atkinson and Brian Ripley},
    year = {2014},
    note = {R package version 4.1-8},
    url = {http://CRAN.R-project.org/package=rpart},
  }

@Manual{beygelzimer2013fnn,
    title = {{FNN}: Fast Nearest Neighbor Search Algorithms and Applications},
    author = {Alina Beygelzimer and Sham Kakadet and John Langford and Sunil Arya and David Mount and Shengqiao Li},
    year = {2013},
    note = {{R} package version 1.1},
    url = {http://CRAN.R-project.org/package=FNN},
  }

@inproceedings{langford2011doubly,
  title={Doubly Robust Policy Evaluation and Learning},
  author={Dud{\'\i}k, Miroslav and Langford, John and Li, Lihong},
  booktitle={Proceedings of the 28th International Conference on Machine Learning},
  pages={1097--1104},
  year={2011}
}

@inproceedings{hsu2014taming,
  title={Taming the Monster: A Fast and Simple Algorithm for Contextual Bandits},
  author={Agarwal, Alekh and Hsu, Daniel and Kale, Satyen and Langford, John and Li, Lihong and Schapire, Robert},
  booktitle={Proceedings of The 31st International Conference on Machine Learning},
  pages={1638--1646},
  year={2014}
}

@inproceedings{beygelzimer2009offset,
  title={The offset tree for learning with partial labels},
  author={Beygelzimer, Alina and Langford, John},
  booktitle={Proceedings of the 15th ACM SIGKDD international conference on Knowledge discovery and data mining},
  pages={129--138},
  year={2009},
  organization={ACM}
}

@article{bhattacharya2012inferring,
  title={Inferring welfare maximizing treatment assignment under budget constraints},
  author={Bhattacharya, Debopam and Dupas, Pascaline},
  journal={Journal of Econometrics},
  volume={167},
  number={1},
  pages={168--196},
  year={2012},
  publisher={Elsevier}
}

@article{cook2004subgroup,
  title={Subgroup analysis in clinical trials},
  author={Cook, David I and Gebski, Val J and Keech, Anthony C},
  journal={Medical Journal of Australia},
  volume={180},
  number={6},
  pages={289--292},
  year={2004},
  publisher={AUSTRALASIAN MEDICAL PUBLISHING COMPANY LTD}
}

@article{willke2012concepts,
  title={From concepts, theory, and evidence of heterogeneity of treatment effects to methodological approaches: a primer},
  author={Willke, Richard J and Zheng, Zhiyuan and Subedi, Prasun and Althin, Rikard and Mullins, C Daniel},
  journal={BMC medical research methodology},
  volume={12},
  number={1},
  pages={185},
  year={2012},
  publisher={BioMed Central Ltd}
}

@article{lee2009non,
  title={Non-parametric tests for distributional treatment effect for randomly censored responses},
  author={Lee, Myoung-jae},
  journal={Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
  volume={71},
  number={1},
  pages={243--264},
  year={2009},
  publisher={Wiley Online Library}
}

@article{crump2008nonparametric,
  title={Nonparametric tests for treatment effect heterogeneity},
  author={Crump, Richard K and Hotz, V Joseph and Imbens, Guido W and Mitnik, Oscar A},
  journal={The Review of Economics and Statistics},
  volume={90},
  number={3},
  pages={389--405},
  year={2008},
  publisher={MIT Press}
}



@article{crump,
  title={Dealing with limited overlap in estimation of average treatment effects},
  author={Crump, Richard K and Hotz, V Joseph and Imbens, Guido W and Mitnik, Oscar A},
  journal={Biometrika},
  pages={asn055},
  year={2009},
  publisher={Biometrika Trust}
}

@article{zeileis2008model,
  title={Model-based recursive partitioning},
  author={Zeileis, Achim and Hothorn, Torsten and Hornik, Kurt},
  journal={Journal of Computational and Graphical Statistics},
  volume={17},
  number={2},
  pages={492--514},
  year={2008},
  publisher={Taylor \& Francis}
}

@article{kallus2016learning,
  title={Recursive partitioning for personalization using observational data},
  author={Kallus, Nathan},
  booktitle={International Conference on Machine Learning},
  pages={1789--1798},
  year={2017}
}

@article{taddy2014heterogeneous,
  title={Heterogeneous Treatment Effects in Digital Experimentation},
  author={Taddy, Matt and Gardner, Matt and Chen, Liyun and Draper, David},
  journal={arXiv preprint arXiv:1412.8563},
  year={2014}
}

@article{weisberg2015post,
  title={Post hoc subgroups in clinical trials: Anathema or analytics?},
  author={Weisberg, Herbert I and Pontes, Victor P},
  journal={Clinical Trials},
  volume={12},
  number={4},
  pages={357--364},
  year={2015},
  publisher={SAGE Publications}
}

@article{tian2014simple,
  title={A simple method for estimating interactions between a treatment and a large number of covariates},
  author={Tian, Lu and Alizadeh, Ash A and Gentles, Andrew J and Tibshirani, Robert},
  journal={Journal of the American Statistical Association},
  volume={109},
  number={508},
  pages={1517--1532},
  year={2014},
  publisher={Taylor \& Francis}
}

@article{dehejia2005program,
  title={Program evaluation as a decision problem},
  author={Dehejia, Rajeev H},
  journal={Journal of Econometrics},
  volume={125},
  number={1},
  pages={141--173},
  year={2005},
  publisher={Elsevier}
}

@article{manski2004statistical,
  title={Statistical treatment rules for heterogeneous populations},
  author={Manski, Charles F},
  journal={Econometrica},
  pages={1221--1246},
  year={2004},
  volume={72},
  number={4},
  publisher={JSTOR}
}

@article{hirano2009asymptotics,
  title={Asymptotics for statistical treatment rules},
  author={Hirano, Keisuke and Porter, Jack R},
  journal={Econometrica},
  pages={1683--1701},
  year={2009},
  volume={77},
  number={5},
  publisher={JSTOR}
}

@article{kleinberg2015prediction,
  title={Prediction Policy Problems},
  author={Kleinberg, Jon and Ludwig, Jens and Mullainathan, Sendhil and Obermeyer, Ziad},
  journal={American Economic Review},
  volume={105},
  number={5},
  pages={491--95},
  year={2015},
  publisher={American Economic Association}
}

@article{assmann2000subgroup,
  title={Subgroup analysis and other (mis) uses of baseline data in clinical trials},
  author={Assmann, Susan F and Pocock, Stuart J and Enos, Laura E and Kasten, Linda E},
  journal={The Lancet},
  volume={355},
  number={9209},
  pages={1064--1069},
  year={2000},
  publisher={Elsevier}
}

@article{scharfstein1999adjusting,
  title={Adjusting for nonignorable drop-out using semiparametric nonresponse models},
  author={Scharfstein, Daniel O and Rotnitzky, Andrea and Robins, James M},
  journal={Journal of the American Statistical Association},
  volume={94},
  number={448},
  pages={1096--1120},
  year={1999},
  publisher={Taylor \& Francis}
}

@article{bang2005doubly,
  title={Doubly robust estimation in missing data and causal inference models},
  author={Bang, Heejung and Robins, James M},
  journal={Biometrics},
  volume={61},
  number={4},
  pages={962--973},
  year={2005},
  publisher={Wiley Online Library}
}

@article{hirano2003efficient,
  title={Efficient estimation of average treatment effects using the estimated propensity score},
  author={Hirano, Keisuke and Imbens, Guido W and Ridder, Geert},
  journal={Econometrica},
  volume={71},
  number={4},
  pages={1161--1189},
  year={2003}
}

@article{mccaffrey2004propensity,
  title={Propensity score estimation with boosted regression for evaluating causal effects in observational studies.},
  author={McCaffrey, Daniel F and Ridgeway, Greg and Morral, Andrew R},
  journal={Psychological Methods},
  volume={9},
  number={4},
  pages={403},
  year={2004},
  publisher={American Psychological Association}
}

@article{westreich2010propensity,
  title={Propensity score estimation: Neural networks, support vector machines, decision trees ({CART}), and meta-classifiers as alternatives to logistic regression},
  author={Westreich, Daniel and Lessler, Justin and Funk, Michele J},
  journal={Journal of Clinical Epidemiology},
  volume={63},
  number={8},
  pages={826--833},
  year={2010},
  publisher={Elsevier}
}

@article{foster2011subgroup,
  title={Subgroup identification from randomized clinical trial data},
  author={Foster, Jared C and Taylor, Jeremy MG and Ruberg, Stephen J},
  journal={Statistics in medicine},
  volume={30},
  number={24},
  pages={2867--2880},
  year={2011},
  publisher={Wiley Online Library}
}

@article{rubin1996matching,
  title={Matching using estimated propensity scores: relating theory to practice},
  author={Rubin, Donald B and Thomas, Neal},
  journal={Biometrics},
  pages={249--264},
  year={1996},
  publisher={JSTOR}
}

@article{chipman2010bart,
  title={{BART}: {B}ayesian additive regression trees},
  author={Chipman, Hugh A and George, Edward I and McCulloch, Robert E},
  journal={The Annals of Applied Statistics},
  pages={266--298},
  year={2010},
  volume={4},
  number={1},
  publisher={JSTOR}
}

@article{hill2011bayesian,
  title={Bayesian nonparametric modeling for causal inference},
  author={Hill, Jennifer L},
  journal={Journal of Computational and Graphical Statistics},
  volume={20},
  number={1},
  year={2011}
}

@article{green2012modeling,
  title={Modeling heterogeneous treatment effects in survey experiments with Bayesian additive regression trees},
  author={Green, Donald P and Kern, Holger L},
  journal={Public opinion quarterly},
  volume={76},
  number={3},
  pages={491--511},
  year={2012},
  publisher={AAPOR}
}

@article{rosenbaum1983central,
  title={The central role of the propensity score in observational studies for causal effects},
  author={Rosenbaum, Paul R and Rubin, Donald B},
  journal={Biometrika},
  volume={70},
  number={1},
  pages={41--55},
  year={1983},
  publisher={Biometrika Trust}
}

@phdthesis{signorovitch2007identifying,
  title={Identifying Informative Biological Markers in High-Dimensional Genomic Data and Clinical Trials},
  author={Signorovitch, James Edward},
  year={2007},
  school={Harvard University}
}

@article{su2009subgroup,
  title={Subgroup analysis via recursive partitioning},
  author={Su, Xiaogang and Tsai, Chih-Ling and Wang, Hansheng and Nickerson, David M and Li, Bogong},
  journal={The Journal of Machine Learning Research},
  volume={10},
  pages={141--158},
  year={2009},
  publisher={JMLR. org}
}

@article{imai2013estimating,
  title={Estimating treatment effect heterogeneity in randomized program evaluation},
  author={Imai, Kosuke and Ratkovic, Marc},
  journal={The Annals of Applied Statistics},
  volume={7},
  number={1},
  pages={443--470},
  year={2013},
  publisher={Institute of Mathematical Statistics}
}

@article{rosenblum2011optimizing,
  title={Optimizing randomized trial designs to distinguish which subpopulations benefit from treatment},
  author={Rosenblum, Michael and van der Laan, Mark J},
  journal={Biometrika},
  volume={98},
  number={4},
  pages={845},
  year={2011},
  publisher={Oxford University Press}
}

@article{breiman2001statistical,
  title={Statistical modeling: The two cultures (with comments and a rejoinder by the author)},
  author={Breiman, Leo},
  journal={Statistical Science},
  volume={16},
  number={3},
  pages={199--231},
  year={2001},
  publisher={Institute of Mathematical Statistics}
}

@article{wager2014asymptotic,
	Author = {Wager, Stefan},
	Journal = {arXiv preprint arXiv:1405.0352},
	Title = {Asymptotic Theory for Random Forests},
	Year = {2014}}

@article{athey2015machine,
  title={Recursive partitioning for heterogeneous causal effects},
  author={Athey, Susan and Imbens, Guido},
  journal={Proceedings of the National Academy of Sciences},
  volume={113},
  number={27},
  pages={7353--7360},
  year={2016},
  publisher={National Acad Sciences}
}

@article{rubin1974estimating,
  title={Estimating causal effects of treatments in randomized and nonrandomized studies.},
  author={Rubin, Donald B},
  journal={Journal of Educational Psychology},
  volume={66},
  number={5},
  pages={688},
  year={1974},
  publisher={American Psychological Association}
}

@article{holland1986statistics,
  title={Statistics and causal inference},
  author={Holland, Paul W},
  journal={Journal of the American statistical Association},
  volume={81},
  number={396},
  pages={945--960},
  year={1986},
  publisher={Taylor \& Francis}
}

@book{imbens2015causal,
  title={Causal Inference in Statistics, Social, and Biomedical Sciences},
  author={Imbens, Guido W and Rubin, Donald B},
  year={2015},
  publisher={Cambridge University Press}
}

@article{biau2010rate,
  title={On the rate of convergence of the bagged nearest neighbor estimate},
  author={Biau, G{\'e}rard and C{\'e}rou, Fr{\'e}d{\'e}ric and Guyader, Arnaud},
  journal={The Journal of Machine Learning Research},
  volume={11},
  pages={687--712},
  year={2010},
  publisher={JMLR. org}
}

@article{samworth2012optimal,
  title={Optimal weighted nearest neighbour classifiers},
  author={Samworth, Richard J},
  journal={The Annals of Statistics},
  volume={40},
  number={5},
  pages={2733--2763},
  year={2012},
  publisher={Institute of Mathematical Statistics}
}

@article{scornet2015consistency,
author = "Scornet, Erwan and Biau, G\'erard and Vert, Jean-Philippe",
journal = "The Annals of Statistics",
number = "4",
pages = "1716--1741",
title = "Consistency of random forests",
volume = "43",
year = "2015"
}


@article{wager2015uniform,
  title={Uniform Convergence of Random Forests via Adaptive Concentration},
  author={Wager, Stefan and Walther, Guenther},
  journal={arXiv preprint arXiv:1503.06388},
  year={2015}
}

@misc{UCI,
	Author = {Bache, Kevin and Lichman, Moshe},
	Institution = {University of California, Irvine, School of Information and Computer Sciences},
	Title = {{UCI} Machine Learning Repository},
	Url = {http://archive.ics.uci.edu/ml},
	Year = {2013},
	Bdsk-Url-1 = {http://archive.ics.uci.edu/ml}}

@article{benjamini1995controlling,
	Author = {Benjamini, Y. and Hochberg, Y.},
	Journal = {Journal of the Royal Statistical Society. Series B (Methodological)},
	Pages = {289--300},
	Publisher = {JSTOR},
	Title = {Controlling the false discovery rate: a practical and powerful approach to multiple testing},
	Year = {1995}}

@article{benjamini2001control,
	Author = {Benjamini, Y. and Yekutieli, D.},
	Journal = {Annals of Statistics},
	Pages = {1165--1188},
	Publisher = {JSTOR},
	Title = {The control of the false discovery rate in multiple testing under dependency},
	Year = {2001}}

@article{bickel1981some,
	Author = {Bickel, Peter J and Freedman, David A},
	Journal = {The Annals of Statistics},
	Number = {6},
	Pages = {1196--1217},
	Publisher = {Institute of Mathematical Statistics},
	Title = {Some asymptotic theory for the bootstrap},
	Volume = {9},
	Year = {1981}}

@book{billingsley2008probability,
	Author = {Billingsley, Patrick},
	Publisher = {John Wiley \& Sons},
	Title = {Probability and measure},
	Year = {2008}}

@article{breiman1996bagging,
	Author = {Breiman, Leo},
	Journal = {Machine Learning},
	Number = {2},
	Pages = {123--140},
	Publisher = {Springer},
	Title = {Bagging predictors},
	Volume = {24},
	Year = {1996}}

@article{friedman2001greedy,
  title={Greedy function approximation: a gradient boosting machine},
  author={Friedman, Jerome H},
  journal={Annals of Statistics},
  pages={1189--1232},
  year={2001},
  publisher={JSTOR}
}

@article{breiman2001random,
	Author = {Breiman, Leo},
	Journal = {Machine Learning},
	Number = {1},
	Pages = {5--32},
	Publisher = {Springer},
	Title = {Random forests},
	Volume = {45},
	Year = {2001}}

@article{brown2009nonparametric,
	Author = {Brown, L.D. and Greenshtein, E.},
	Journal = {The Annals of Statistics},
	Number = {4},
	Pages = {1685--1704},
	Publisher = {Institute of Mathematical Statistics},
	Title = {Nonparametric empirical Bayes and compound decision approaches to estimation of a high-dimensional vector of normal means},
	Volume = {37},
	Year = {2009}}

@article{buhlmann2002analyzing,
	Author = {B{\"u}hlmann, Peter and Yu, Bin},
	Journal = {The Annals of Statistics},
	Number = {4},
	Pages = {927--961},
	Publisher = {Institute of Mathematical Statistics},
	Title = {Analyzing bagging},
	Volume = {30},
	Year = {2002}}

@article{buja2000smoothing,
	Author = {Buja, Andreas and Stuetzle, Werner},
	Journal = {Preprint. AT\&T Labs-Research},
	Publisher = {Citeseer},
	Title = {Smoothing effects of bagging},
	Year = {2000}}

@article{buja2006observations,
	Author = {Buja, Andreas and Stuetzle, Werner},
	Journal = {Statistica Sinica},
	Number = {2},
	Pages = {323},
	Title = {Observations on bagging},
	Volume = {16},
	Year = {2006}}

@article{butucea2009adaptive,
	Author = {Butucea, C. and Comte, F.},
	Journal = {Bernoulli},
	Number = {1},
	Pages = {69--98},
	Publisher = {Bernoulli Society for Mathematical Statistics and Probability},
	Title = {Adaptive estimation of linear functionals in the convolution model and applications},
	Volume = {15},
	Year = {2009}}

@article{carroll1988optimal,
	Author = {Carroll, R.J. and Hall, P.},
	Journal = {Journal of the American Statistical Association},
	Pages = {1184--1186},
	Publisher = {JSTOR},
	Title = {Optimal rates of convergence for deconvolving a density},
	Year = {1988}}

@article{chen2003effects,
	Author = {Chen, Song Xi and Hall, Peter},
	Journal = {Statistica Sinica},
	Number = {1},
	Pages = {97--110},
	Title = {Effects of bagging and bias correction on estimators defined by estimating equations},
	Volume = {13},
	Year = {2003}}

@article{csorgo1981limit,
	Author = {Cs\"{o}rg\H{o}, S.},
	Journal = {The Annals of Probability},
	Number = {1},
	Pages = {130--144},
	Publisher = {Institute of Mathematical Statistics},
	Title = {Limit behaviour of the empirical characteristic function},
	Volume = {9},
	Year = {1981}}

@article{diaconis1985testing,
	Author = {Diaconis, P. and Efron, B.},
	Journal = {The Annals of Statistics},
	Number = {3},
	Pages = {845--874},
	Publisher = {Institute of Mathematical Statistics},
	Title = {Testing for independence in a two-way table: new interpretations of the chi-square statistic},
	Volume = {13},
	Year = {1985}}

@phdthesis{duan2011bootstrap,
	Author = {Duan, Jiangtao},
	School = {North Carolina State University},
	Title = {Bootstrap-Based Variance Estimators for a Bagging Predictor.},
	Year = {2011}}

@article{efron1981jackknife,
	Author = {Efron, Bradley and Stein, Charles},
	Journal = {The Annals of Statistics},
	Pages = {586--596},
	Publisher = {JSTOR},
	volume={9},
	number={3},
	Title = {The jackknife estimate of variance},
	Year = {1981}}

@article{efron1991compliance,
	Author = {Efron, Bradley and Feldman, David},
	Journal = {Journal of the American Statistical Association},
	Number = {413},
	Pages = {9--17},
	Publisher = {Taylor \& Francis Group},
	Title = {Compliance as an explanatory variable in clinical trials},
	Volume = {86},
	Year = {1991}}

@article{efron1992jackknife,
	Author = {Efron, Bradley},
	Journal = {Journal of the Royal Statistical Society. Series B (Methodological)},
	Pages = {83--127},
	Publisher = {JSTOR},
	Title = {Jackknife-after-bootstrap standard errors and influence functions},
	Year = {1992}}

@book{efron1994introduction,
	Author = {Efron, Bradley and Tibshirani, Robert J},
	Publisher = {Chapman \& Hall/CRC},
	Title = {An Introduction to the Bootstrap},
	Volume = {57},
	Year = {1994}}

@article{efron2001empirical,
	Author = {Efron, B. and Tibshirani, R. and Storey, J.D. and Tusher, V.},
	Journal = {Journal of the American Statistical Association},
	Number = {456},
	Pages = {1151--1160},
	Publisher = {ASA},
	Title = {Empirical Bayes analysis of a microarray experiment},
	Volume = {96},
	Year = {2001}}

@article{efron2004large,
	Author = {Efron, B.},
	Journal = {Journal of the American Statistical Association},
	Number = {465},
	Pages = {96--104},
	Publisher = {ASA},
	Title = {Large-scale simultaneous hypothesis testing},
	Volume = {99},
	Year = {2004}}

@article{efron2007correlation,
	Author = {Efron, B.},
	Journal = {Journal of the American Statistical Association},
	Number = {477},
	Pages = {93--103},
	Publisher = {ASA},
	Title = {Correlation and large-scale simultaneous significance testing},
	Volume = {102},
	Year = {2007}}

@article{efron2007size,
	Author = {Efron, B.},
	Journal = {The Annals of Statistics},
	Number = {4},
	Pages = {1351--1377},
	Publisher = {Institute of Mathematical Statistics},
	Title = {Size, power and false discovery rates},
	Volume = {35},
	Year = {2007}}

@book{efron2010large,
	Author = {Efron, B.},
	Publisher = {Cambridge Univ Pr},
	Title = {Large-Scale Inference: Empirical Bayes Methods for Estimation, Testing, and Prediction},
	Year = {2010}}

@article{efron2011tweedie,
	Author = {Efron, Bradley},
	Journal = {Journal of the American Statistical Association},
	Number = {496},
	Pages = {1602--1614},
	Publisher = {ASA},
	Title = {Tweedie{'}s formula and selection bias},
	Volume = {106},
	Year = {2011}}

@article{efron2013estimation,
	Author = {Efron, Bradley},
	Journal = {Journal of the American Statistical Association},
	Publisher = {Taylor \& Francis Group},
	Title = {Estimation and Accuracy after Model Selection (with Discussion)},
	Volume={109},
	Number={507},
	Year = {2014}}

@article{efron2012model,
	Author = {Efron, Bradley},
	Journal = {Journal of the American Statistical Association},
	Number = {just-accepted},
	Publisher = {Taylor \& Francis Group},
	Title = {Estimation and Accuracy after Model Selection},
	Year = {2013}}

@article{fan1991optimal,
	Author = {Fan, J.},
	Journal = {The Annals of Statistics},
	Pages = {1257--1272},
	Publisher = {JSTOR},
	Title = {On the optimal rates of convergence for nonparametric deconvolution problems},
	Year = {1991}}

@article{friedman2007bagging,
	Author = {Friedman, Jerome H and Hall, Peter},
	Journal = {Journal of Statistical Planning and Inference},
	Number = {3},
	Pages = {669--683},
	Publisher = {Elsevier},
	Title = {On bagging and nonlinear estimation},
	Volume = {137},
	Year = {2007}}

@article{golub1999molecular,
	Author = {Golub, T.R. and Slonim, D.K. and Tamayo, P. and Huard, C. and Gaasenbeek, M. and Mesirov, J.P. and Coller, H. and Loh, M.L. and Downing, J.R. and Caligiuri, M.A. and others},
	Journal = {science},
	Number = {5439},
	Pages = {531--537},
	Publisher = {American Association for the Advancement of Science},
	Title = {Molecular classification of cancer: class discovery and class prediction by gene expression monitoring},
	Volume = {286},
	Year = {1999}}

@article{grandvalet2004bagging,
	Author = {Grandvalet, Yves},
	Journal = {Machine Learning},
	Number = {3},
	Pages = {251--270},
	Publisher = {Springer},
	Title = {Bagging equalizes influence},
	Volume = {55},
	Year = {2004}}

@article{hall1992bootstrap,
	Author = {Hall, Peter},
	Publisher = {Springer},
	Title = {The Bootstrap and Edgeworth Expansion},
	Year = {1992}}

@article{hall2005properties,
	Author = {Hall, Peter and Samworth, Richard J},
	Journal = {Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
	Number = {3},
	Pages = {363--379},
	Publisher = {Wiley Online Library},
	Title = {Properties of bagged nearest neighbour classifiers},
	Volume = {67},
	Year = {2005}}

@book{hastie2009elements,
	Author = {Hastie, Trevor and Tibshirani, Robert and Friedman, Jerome},
	Publisher = {New York: Springer},
	Title = {The Elements of Statistical Learning},
	Year = {2009}}

@article{hedenfalk2001gene,
	Author = {Hedenfalk, I. and Duggan, D. and Chen, Y. and Radmacher, M. and Bittner, M. and Simon, R. and Meltzer, P. and Gusterson, B. and Esteller, M. and Raffeld, M. and others},
	Journal = {New England Journal of Medicine},
	Number = {8},
	Pages = {539--548},
	Publisher = {Mass Medical Soc},
	Title = {Gene-expression profiles in hereditary breast cancer},
	Volume = {344},
	Year = {2001}}

@article{hoeffding1948class,
	Author = {Hoeffding, Wassily},
	Journal = {The Annals of Mathematical Statistics},
	Number = {3},
	Pages = {293--325},
	Publisher = {Institute of Mathematical Statistics},
	Title = {A class of statistics with asymptotically normal distribution},
	Volume = {19},
	Year = {1948}}

@book{jaeckel1972infinitesimal,
	Author = {Jaeckel, Louis A},
	Title = {The Infinitesimal Jackknife},
	Year = {1972}}

@article{jiang2009general,
	Author = {Jiang, W. and Zhang, C.H.},
	Journal = {The Annals of Statistics},
	Number = {4},
	Pages = {1647--1684},
	Publisher = {Institute of Mathematical Statistics},
	Title = {General maximum likelihood empirical Bayes estimation of normal means},
	Volume = {37},
	Year = {2009}}

@article{jin2007estimating,
	Author = {Jin, J. and Cai, T.T.},
	Journal = {Journal of the American Statistical Association},
	Number = {478},
	Pages = {495--506},
	Publisher = {ASA},
	Title = {Estimating the null and the proportion of nonnull effects in large-scale multiple comparisons},
	Volume = {102},
	Year = {2007}}

@book{lehmann1998theory,
	Author = {Lehmann, Erich Leo and Casella, George},
	Publisher = {Springer},
	Title = {Theory of Point Estimation},
	Volume = {31},
	Year = {1998}}

@article{liaw2002classification,
	Author = {Andy Liaw and Matthew Wiener},
	Date-Modified = {2014-03-28 23:34:10 +0000},
	Journal = {R News},
	Number = {3},
	Pages = {18-22},
	Title = {Classification and Regression by random{F}orest},
	Url = {http://CRAN.R-project.org/doc/Rnews/},
	Volume = {2},
	Year = {2002},
	Bdsk-Url-1 = {http://CRAN.R-project.org/doc/Rnews/}}

@article{owen2005variance,
	Author = {Owen, A.B.},
	Journal = {Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
	Number = {3},
	Pages = {411--426},
	Publisher = {Wiley Online Library},
	Title = {Variance of the number of false discoveries},
	Volume = {67},
	Year = {2005}}

@article{mallows1973some,
	Author = {Mallows, Colin L},
	Journal = {Technometrics},
	Number = {4},
	Pages = {661--675},
	Publisher = {Taylor \& Francis},
	Title = {Some comments on {Cp}},
	Volume = {15},
	Year = {1973}}

@article{qiu2005correlation,
	Author = {Qiu, X. and Klebanov, L. and Yakovlev, A.},
	Journal = {Statistical Applications in Genetics and Molecular Biology},
	Number = {1},
	Pages = {1157},
	Publisher = {Berkeley Electronic Press, 805 Camelia St, Second Floor, Berkeley, CA, 94710, USA,},
	Title = {Correlation between gene expression levels and limitations of the empirical Bayes methodology for finding differentially expressed genes},
	Volume = {4},
	Year = {2005}}

@book{ripley2002modern,
	Address = {New York},
	Author = {Venables, William N and Ripley, Brian D},
	Edition = {Fourth},
	Note = {ISBN 0-387-95457-0},
	Publisher = {Springer},
	Title = {Modern Applied Statistics with S},
	Url = {http://www.stats.ox.ac.uk/pub/MASS4},
	Year = {2002},
	Bdsk-Url-1 = {http://www.stats.ox.ac.uk/pub/MASS4}}

@article{rosenblatt1971curve,
	Author = {Rosenblatt, M.},
	Journal = {The Annals of Mathematical Statistics},
	Number = {6},
	Pages = {1815--1842},
	Publisher = {JSTOR},
	Title = {Curve estimates},
	Volume = {42},
	Year = {1971}}

@article{rossouw1983coronary,
	Author = {Rossouw, JE and Du Plessis, JP and Benad{\'e}, AJ and Jordaan, PC and Kotze, JP and Jooste, PL and Ferreira, JJ},
	Journal = {South African Medical Journal= Suid-Afrikaanse Tydskrif vir Geneeskunde},
	Number = {12},
	Pages = {430--436},
	Title = {Coronary risk factor screening in three rural communities. The CORIS baseline study.},
	Volume = {64},
	Year = {1983}}

@article{serfling1981approximation,
	Author = {Serfling, Robert J},
	Publisher = {Wiley-Interscience},
	Title = {Approximation Theorems of Mathematical Statistics (Wiley Series in Probability and Statistics)},
	Year = {1981}}

@article{sexton2009standard,
	Author = {Sexton, Joseph and Laake, Petter},
	Journal = {Computational Statistics \& Data Analysis},
	Number = {3},
	Pages = {801--811},
	Publisher = {Elsevier},
	Title = {Standard errors for bagged and random forest estimators},
	Volume = {53},
	Year = {2009}}

@article{stone1977consistent,
  title={Consistent nonparametric regression},
  author={Stone, Charles J},
  journal={The Annals of Statistics},
  pages={595--620},
  year={1977},
  publisher={JSTOR}
}

@article{cerda2018similarity,
  title={Similarity encoding for learning with dirty categorical variables},
  author={Cerda, Patricio and Varoquaux, Ga{\"e}l and K{\'e}gl, Bal{\'a}zs},
  journal={Machine Learning},
  pages={1--18},
  year={2018},
  publisher={Springer}
}

@inproceedings{rahimi2008random,
  title={Random features for large-scale kernel machines},
  author={Rahimi, Ali and Recht, Benjamin},
  booktitle={Advances in neural information processing systems},
  pages={1177--1184},
  year={2008}
}

@article{farago1993strong,
  title={Strong universal consistency of neural network classifiers},
  author={Farag{\'o}, Andr{\'a}s and Lugosi, G{\'a}bor},
  journal={IEEE Transactions on Information Theory},
  volume={39},
  number={4},
  pages={1146--1151},
  year={1993},
  publisher={IEEE}
}

@article{skurichina1998bagging,
	Author = {Skurichina, Marina and Duin, Robert PW},
	Journal = {Pattern Recognition},
	Number = {7},
	Pages = {909--930},
	Publisher = {Elsevier},
	Title = {Bagging for linear classifiers},
	Volume = {31},
	Year = {1998}}

@article{stamey1989prostate,
	Author = {Stamey, Thomas A and Kabalin, John N and McNeal, John E and Johnstone, Iain M and Freiha, Fuad and Redwine, EA and Yang, N},
	Date-Modified = {2014-03-28 23:34:31 +0000},
	Journal = {The Journal of Urology},
	Number = {5},
	Pages = {1076--1083},
	Title = {Prostate specific antigen in the diagnosis and treatment of adenocarcinoma of the prostate. {II.} Radical prostatectomy treated patients.},
	Volume = {141},
	Year = {1989}}

@article{stefanski1990deconvoluting,
	Author = {Stefanski, L.A. and Carroll, R. J.},
	Journal = {Statistics: A Journal of Theoretical and Applied Statistics},
	Number = {2},
	Pages = {169--184},
	Publisher = {Taylor \& Francis},
	Title = {Deconvoluting kernel density estimators},
	Volume = {21},
	Year = {1990}}

@book{van2000asymptotic,
	Author = {van der Vaart, Aad W.},
	Date-Modified = {2014-05-01 01:33:02 +0000},
	Number = {3},
	Publisher = {Cambridge Univ Pr},
	Title = {Asymptotic Statistics},
	Year = {2000}}

@article{vantwout2003cellular,
	Author = {Van't Wout, A.B. and Lehrman, G.K. and Mikheeva, S.A. and O'Keeffe, G.C. and Katze, M.G. and Bumgarner, R.E. and Geiss, G.K. and Mullins, J.I.},
	Journal = {Journal of Virology},
	Number = {2},
	Pages = {1392--1402},
	Publisher = {Am Soc Microbiol},
	Title = {Cellular gene expression upon human immunodeficiency virus type 1 infection of CD4+-T-cell lines},
	Volume = {77},
	Year = {2003}}

@article{zhang1997empirical,
	Author = {Zhang, C.H.},
	Journal = {Statistica Sinica},
	Pages = {181--194},
	Title = {Empirical Bayes and compound estimation of normal means},
	Volume = {7},
	Year = {1997}}

@article{zhang2005general,
	Author = {Zhang, C.H.},
	Journal = {The Annals of Statistics},
	Number = {1},
	Pages = {54--100},
	Publisher = {Institute of Mathematical Statistics},
	Title = {General empirical Bayes wavelet methods and exactly adaptive minimax estimation},
	Volume = {33},
	Year = {2005}}

@article{friedman2002stochastic,
	Author = {Friedman, Jerome H},
	Journal = {Computational Statistics \& Data Analysis},
	Number = {4},
	Pages = {367--378},
	Publisher = {Elsevier},
	Title = {Stochastic gradient boosting},
	Volume = {38},
	Year = {2002}}

@article{strobl2007bias,
	Author = {Strobl, Carolin and Boulesteix, Anne-Laure and Zeileis, Achim and Hothorn, Torsten},
	Journal = {BMC Bioinformatics},
	Number = {1},
	Pages = {25},
	Publisher = {BioMed Central Ltd},
	Title = {Bias in random forest variable importance measures: Illustrations, sources and a solution},
	Volume = {8},
	Year = {2007}}

@article{dietterich2000experimental,
	Author = {Dietterich, Thomas G},
	Journal = {Machine Learning},
	Number = {2},
	Pages = {139--157},
	Publisher = {Springer},
	Title = {An experimental comparison of three methods for constructing ensembles of decision trees: Bagging, boosting, and randomization},
	Volume = {40},
	Year = {2000}}

@article{geurts2006extremely,
	Author = {Geurts, Pierre and Ernst, Damien and Wehenkel, Louis},
	Journal = {Machine Learning},
	Number = {1},
	Pages = {3--42},
	Publisher = {Springer},
	Title = {Extremely randomized trees},
	Volume = {63},
	Year = {2006}}

@article{biau2008consistency,
	Author = {Biau, G{\'e}rard and Devroye, Luc and Lugosi, G{\'a}bor},
	Journal = {The Journal of Machine Learning Research},
	Pages = {2015--2033},
	Publisher = {JMLR. org},
	Title = {Consistency of random forests and other averaging classifiers},
	Volume = {9},
	Year = {2008}}

@article{biau2012analysis,
	Author = {Biau, G{\'e}rard},
	Journal = {The Journal of Machine Learning Research},
	Pages = {1063--1095},
	Publisher = {JMLR. org},
	Title = {Analysis of a random forests model},
	Volume = {13},
	Year = {2012}}

@article{meinshausen2006quantile,
	Author = {Meinshausen, Nicolai},
	Journal = {The Journal of Machine Learning Research},
	Pages = {983--999},
	Publisher = {JMLR. org},
	Title = {Quantile regression forests},
	Volume = {7},
	Year = {2006}}

@article{lin2006random,
	Author = {Lin, Yi and Jeon, Yongho},
	Journal = {Journal of the American Statistical Association},
	Number = {474},
	Pages = {578--590},
	Publisher = {Taylor \& Francis},
	Title = {Random forests and adaptive nearest neighbors},
	Volume = {101},
	Year = {2006}}

@article{wager2013subsampling,
	Author = {Wager, Stefan},
	Journal = {arXiv preprint arXiv:1204.0316},
	Title = {Subsampling Extremes: From Block Maxima to Smooth Tail Estimation},
	Year = {2013}}

@article{wager2014confidence,
	Author = {Wager, Stefan and Hastie, Trevor and Efron, Bradley},
	Date-Modified = {2014-05-02 06:44:00 +0000},
	Journal = {The Journal of Machine Learning Research},
	Title = {Confidence Intervals for Random Forests: The Jackknife and the Infinitesimal Jackknife},
	Volume = {15},
	Year = {2014}}

@book{breiman1984classification,
	Author = {Breiman, Leo and Friedman, Jerome and Stone, Charles J and Olshen, Richard A},
	Publisher = {CRC press},
	Title = {Classification and Regression Trees},
	Year = {1984}}

@book{politis1999subsampling,
	Author = {Politis, Dimitris N. and Romano, Joseph P. and Wolf, Michael},
	Date-Modified = {2014-05-01 01:33:54 +0000},
	Publisher = {Springer New York},
	Series = {Springer Series in Statistics},
	Title = {Subsampling},
	Year = {1999}}

@article{hajek1968asymptotic,
	Author = {H{\'a}jek, Jaroslav},
	Date-Modified = {2014-05-08 22:49:18 +0000},
	Journal = {The Annals of Mathematical Statistics},
	Number = {2},
	Pages = {325--346},
	Publisher = {Institute of Mathematical Statistics},
	Title = {Asymptotic normality of simple linear rank statistics under alternatives},
	Volume = {39},
	Year = {1968}}

@article{breiman2004consistency,
	Author = {Breiman, Leo},
	Journal = {Statistical Department, University of California at Berkeley. Technical Report},
	Number = {670},
	Title = {Consistency for a simple model of random forests},
	Year = {2004}}

@article{harrison1978hedonic,
	Author = {Harrison, Jr, David and Rubinfeld, Daniel L},
	Journal = {Journal of Environmental Economics and Management},
	Number = {1},
	Pages = {81--102},
	Publisher = {Elsevier},
	Title = {Hedonic housing prices and the demand for clean air},
	Volume = {5},
	Year = {1978}}

@incollection{cortez2007data,
	Address = {Portugal},
	Author = {Cortez, Paulo and Morais, An{\'\i}bal de Jesus Raimundo},
	Booktitle = {New Trends in Artificial Intelligence, Proceedings of the 13th EPIA 2007 - Portuguese Conference on Artificial Intelligence},
	Editor = {J. Neves and M. F. Santos and J. Machado},
	Publisher = {Guimar{\~a}es},
	Title = {A data mining approach to predict forest fires using meteorological data},
	Year = {2007}}

@article{mentch2014ensemble,
	Author = {Mentch, Lucas and Hooker, Giles},
	Journal = {arXiv preprint arXiv:1404.6473},
	Title = {Ensemble Trees and {CLT}s: Statistical Inference for Supervised Learning},
	Year = {2014}}

@inproceedings{denil2014narrowing,
	Author = {Denil, Misha and Matheson, David and De Freitas, Nando},
	Booktitle = {Proceedings of The 31st International Conference on Machine Learning},
	Pages = {665--673},
	Title = {Narrowing the Gap: Random Forests In Theory and In Practice},
	Year = {2014}}



%% New references from Vitor and Jonathan's experimental setup version
% Ames housing dataset
@article{de2011ames,
  title={Ames, Iowa: Alternative to the Boston housing data as an end of semester regression project},
  author={De Cock, Dean},
  journal={Journal of Statistics Education},
  volume={19},
  number={3},
  year={2011},
  publisher={Taylor \& Francis}
}

% Dmitry and Imbens' paper on fixed effects 
@techreport{arkhangelsky2018role,
  title={The role of the propensity score in fixed effect models},
  author={Arkhangelsky, Dmitry and Imbens, Guido},
  year={2018},
  institution={National Bureau of Economic Research}
}

% Bai and Ng articles on factor models
@article{bai2002determining,
  title={Determining the number of factors in approximate factor models},
  author={Bai, Jushan and Ng, Serena},
  journal={Econometrica},
  volume={70},
  number={1},
  pages={191--221},
  year={2002},
  publisher={Wiley Online Library}
}

% Really clear and useful explanation of different coding methods
@misc{venables2016codingmatrices,
  title={codingMatrices: Alternative factor coding matrices for linear model formulae [Software]},
  author={Venables, WN},
  year={2016}
}


% Kevin Murphy's ML textbook
@misc{murphy2012machine,
  title={Machine learning, a probabilistic perspective},
  author={Murphy, Kevin},
  year={2012},
  publisher={The MIT Press.}
}

%Sparse PCA paper
@article{zou2006sparse,
title={Sparse principal component analysis.},
author={Zou, Hui, and Trevor Hastie, and Robert Tibshirani},
journal={Journal of computational and graphical statistics},
pages={265-286},
year={2006}
}
%Zou, Hui, Trevor Hastie, and Robert Tibshirani. "Sparse principal component analysis." Journal of computational and graphical statistics 15.2 (2006): 265-286.

%we use the below for the notation on PCA

% Hastie, Tibshirani, Friedman ESL textbook
@misc{hastie2016elements,
  title={The Elements of Statistical Learning, Data Mining, Inference, and Prediction, Second Edition},
  author={Hastie, Trevor, and Tibshirani, Robert, and Friedman, Jerome},
  year={2016},
  publisher={Springer}
}

@online{pakistanEducation,
  author = {Hemani, Mesum Raza},
  title = {Pakistan Education Performance Dataset},
  year = 2017,
  url = {https://www.kaggle.com/mesumraza/pakistan-education-performance-dataset/},
  urldate = {2018-12-23}
}

@online{houseSalesKingCounty,
  author = {harlfoxem},
  title = {House Sales in King County,USA},
  year = 2016,
  url = {https://www.kaggle.com/harlfoxem/housesalesprediction/},
  urldate = {2018-12-23}
}
@inproceedings{chen2016xgboost,
  title={Xgboost: A scalable tree boosting system},
  author={Chen, Tianqi and Guestrin, Carlos},
  booktitle={Proceedings of the 22nd acm sigkdd international conference on knowledge discovery and data mining},
  pages={785--794},
  year={2016},
  organization={ACM}
}

@article{cook2007fisher,
  title={Fisher lecture: Dimension reduction in regression},
  author={Cook, R Dennis and others},
  journal={Statistical Science},
  volume={22},
  number={1},
  pages={1--26},
  year={2007},
  publisher={Institute of Mathematical Statistics}
}